
// generate all the data infrastructure with variables for labor income, age and the like
// which are necessary to complete the first stage estimation


//read in the cpi
//monet. variables are always referring to the previous period
use ${dat}cpi.dta, clear
qui forv Y=1999(1)2015{
sum cpi if year==`Y'-1
global cpi_`Y'=r(mean)
}
*

capture erase ${dat}panel_99_15.dta

forvalues Y=1999(2)2015 {

	use ${dat}taxes_posttaxsim_171212.dta, clear 
	
	*restrict to current intyear
	keep if intyr`Y'==`Y' 
	*restrict vars
	keep personid sex *`Y'*
	
	gen year=`Y'
	

	rename *`Y' * 
	rename t`Y'* t*
	rename c`Y'* c*

	*keep heads only
	gen head=0
	replace head=1 if reltohd==10
	keep if head==1
	
	*heathcote restrictions
	rename agehead age
	keep if inrange(age,25,60)

	/*TRIMMING AT THE BOTTOM*/
	*At least working at MW ($5.15ph) part-time (for 1000=4h*5d*52w hours)
	gen help=yhd
	replace help=help/${cpi_`Y'}
	keep if help>5150
	drop help
	
	
	*restrict by hours
	keep if hrs>=260 //too low
	drop if hrs>4000 //too high
	*drop female heads
	drop if marstat==3
	
	
	*how many remain?
	sum personid

	
	*EDITING VARIABLES FOR MAIN ANALYSIS
	
	foreach v of varlist yhd wgehd oth_inc {
	replace `v'=`v'/${cpi_`Y'}
	}
	
	*earnings
	rename yhd y
	gen ly=log(y)
	
	*hours 
	rename hrs h
	gen lh=log(h)
	
	*wages
	rename wgehd w
	gen lw=log(w)
	
	*other income
	gen lo=log(oth_inc)
	
	*control variables
	
	*marital status
	clonevar mstat=marstat 
	recode mstat (2=1) (4=1) (5=0)
	
	*race
	rename racehd race
	
	*educ
	rename compleduc educ
	
	*year of birth
	rename yearborn yob
	
	*children
	rename intnumchd chd
	
	*recode industry and occupation variables to compressed categories
	do ${main_dir}\ind_occ_edit.do
	
	
	*weights
	replace fwgt=round(fwgt)
	
	
	*tax variables
	
	/*pull variables */

	/*			pwages swages dividends otherprop pensions gssi  transfers*/
	foreach Z in var7  var8   var9 		var10 	  var11    var12  var13 {
	gen tfu_`Z' = t`Z'head
	replace tfu_`Z' = t`Z'joint if inlist(marstat,1,2,3) 
	}
	
	rename tfu_var13 transfers

	foreach Z in txbl fica siitax fiitax {
	gen tfu_`Z' = thead_`Z'
	replace tfu_`Z' = tjoint_`Z' if inlist(marstat,1,2,3) 
	}
	rename tfu_txbl y_txbl
	rename tfu_fica fica
	rename tfu_fiitax fiitax
	rename tfu_siitax siitax
	
	*pregov income
	global incvars "tfu_var7 tfu_var8 tfu_var9 tfu_var10 tfu_var11 tfu_var12"
	egen y_gross=rowtotal($incvars) 
	replace y_gross=y_gross + 0.5*fica if fica>0 & fica!=.
	
	*tax owed
	egen tax_liable=rsum(fiitax siitax fica)
	
	*postgov income
	gen y_net=y_gross-tax_liable
	replace y_net=y_net + 0.5*fica if fica>0 & fica!=.
	replace y_net=y_net + transfers if transfers>0 & transfers!=.
	
	
	*marginal rates
	gen mtrate_f=thead_frate
	replace mtrate_f=tjoint_frate if inlist(marstat,1,2,3)
	gen mtrate_s=thead_srate
	replace mtrate_s=tjoint_srate if inlist(marstat,1,2,3)
	gen mtrate=mtrate_f+mtrate_s
	
	foreach v of varlist y_gross tax_liable y_net y_txbl {
	replace `v'=`v'/${cpi_`Y'}
	}
	
	*generate other income for tractable approach
	gen oth_inc_tax=y_gross-y
	drop if oth_inc_tax<0
	drop if y_net<0
	
	*saving intermediates
	if `Y'==1999 save ${dat}panel_99_15.dta, replace
	else if `Y'>1999 { 
	append using ${dat}panel_99_15.dta
	save ${dat}panel_99_15.dta, replace
	}

}
*

	foreach v of varlist race state indust occ yob year chd {
	qui tab `v', gen(d`v') 
	}
	*
	
	#d ;
	keep personid fwgt y ly h lh w lw age mstat educ sex race state indust occ yob year chd d*
	y_gross tax_liable y_txbl y_net oth_inc_tax fica transfers mtrate_f mtrate_s mtrate;
	#d cr
	
save ${dat}panel_99_15.dta, replace
